from urllib.request import urlopen
import re

# if has Chinese, apply decode()
# 因为网页中存在中文, 为了正常显示中文, read() 完以后, 
# 我们要对读出来的文字进行转换, decode() 成可以正常显示中文的形式.
html = urlopen("https://morvanzhou.github.io/static/scraping/basic-structure.html").read().decode('utf-8')
print(html)
res = re.findall(r"<title>(.+?)</title>", html)
print("\nPage title is: ", res[0])
res = re.findall(r'href="(.*?)"', html)
print("\nAll links: ", res)